Generate data with imperfections

Fit normal HMM with 2 states

Try normal model with 3 states
model <- stan_model("hmm.stan")
data_stan <- list(
N=nrow(df),
dist=df$obs,
K=3,
cost=10,
p_anomalous=0.01,
conc_anomalous=100
)
fit <- optimise_repeat(5, model, data_stan)
df$state_est <- fit$par$state
df$state_est[1] <- 1
g <- df %>%
mutate(state_est=as.factor(state_est)) %>%
ggplot(aes(x=time, y=obs)) +
geom_line() +
geom_point(aes(colour=state_est)) +
xlab("Time") +
ylab(latex2exp::TeX("$\\Y_t")) +
scale_color_brewer("State", palette = "Dark2")
g

# ggsave("../figures/simulated_corrupt_normal_state_3.pdf", g, width = 6,
# height = 3)
Fit a model with rubbish collection state.
data_stan <- list(
N=nrow(df),
dist=df$obs,
K=2,
cost=10,
p_anomalous=0.01,
conc_anomalous=100
)
model <- stan_model("hmm_bernoulli.stan")
fit <- optimise_repeat(5, model, data_stan)
df$state_est <- fit$par$state
df$state_est[1] <- 1
g <- df %>%
mutate(state_est=as.factor(state_est)) %>%
ggplot(aes(x=time, y=obs)) +
geom_line() +
geom_point(aes(colour=state_est)) +
xlab("Time") +
ylab(latex2exp::TeX("$\\Y_t")) +
scale_color_brewer("State", palette = "Dark2")
g

# ggsave("../figures/simulated_corrupt_normal_state_3.pdf", g, width = 6,
# height = 3)
fit$par$phi
[1] 0.01184509
fit$par$mu
[1] 0.9560906 8.0321222
fit$par$sigma
[1] 1.198069 1.321352
2D HMM

Fit 2D robust HMM
data_stan <- list(
N=nrow(df),
dist=x %>% as.matrix(),
K=2,
cost=7,
p_anomalous=0.1,
conc_anomalous=100
)
model <- stan_model("hmm_bernoulli_2d.stan")
fit <- optimise_repeat(10, model, data_stan)
g <- x %>%
mutate(state=fit$par$state) %>%
ggplot(aes(x=V1, y=V2)) +
geom_point(aes(colour=as.factor(state))) +
xlab(latex2exp::TeX("$\\Y_{1t}")) +
ylab(latex2exp::TeX("$\\Y_{2t}")) +
scale_color_brewer("State", palette = "Dark2")
g

# ggsave("../figures/simulated_2d_corrupt_rubbish.pdf", g, width = 6,
# height = 3)
fit_r <- fit
fit$par$mu
[,1] [,2]
[1,] 7.88259312 7.96074249
[2,] -0.04429909 -0.08280655
fit$par$sigma
[1] 1.015293 1.038413
Try standard HMM with 2 states

fit_2 <- fit
fit$par$mu
[,1] [,2]
[1,] 7.5813273 7.6541182
[2,] -0.2256035 -0.2759647
fit$par$sigma
[1] 1.541746 1.581182
Try standard HMM with 3 states

fit_3 <- fit
fit$par$mu
[,1] [,2]
[1,] -0.3135811 -0.372200
[2,] 3.5225315 3.660930
[3,] 7.8946867 7.964446
fit$par$sigma
[1] 1.295070 1.335942
Plot state cleanliness
mu_r <- fit_r$par$mu
mu_r <- mu_r[c(2, 1), ]
mu_2 <- fit_2$par$mu
mu_2 <- mu_2[c(2, 1), ]
mu_3 <- fit_3$par$mu[c(1, 3), ]
mu_true <- matrix(c(0, 0, 8, 8), ncol=2, byrow = TRUE)
tmp <- tribble(
~y1, ~y2, ~state, ~label,
mu_true[1, 1], mu_true[1, 2], 1, "truth",
mu_true[2, 1], mu_true[2, 2], 2, "truth",
mu_r[1, 1], mu_r[1, 2], 1, "rubbish",
mu_r[2, 1], mu_r[2, 2], 2, "rubbish",
mu_2[1, 1], mu_2[1, 2], 1, "2-state normal",
mu_2[2, 1], mu_2[2, 2], 2, "2-state normal",
mu_3[1, 1], mu_3[1, 2], 1, "3-state normal",
mu_3[2, 1], mu_3[2, 2], 2, "3-state normal"
) %>%
mutate(state=as.factor(state))
g <- tmp %>%
ggplot(aes(x=y1, y=y2)) +
geom_point(data=tmp %>% filter(label=="truth"), colour="red") +
geom_point(aes(shape=label), size=2) +
xlab(latex2exp::TeX("$\\Y_{1t}")) +
ylab(latex2exp::TeX("$\\Y_{2t}")) +
scale_color_brewer("State", palette = "Dark2") +
facet_wrap(~state, scales="free")
g
ggsave("../figures/state_cleanliness_mean.pdf", g)
Saving 5.64 x 3.48 in image

Output covariances for visualisation in Mathematica
Apply rubbish collection to real data
df <- read.csv("../data/ElephantsData_ano.csv",sep=";")
df <- df %>%
rename_all(tolower)
df_1 <- df %>%
filter(id == 10) %>%
mutate(dist = dist /max(dist)) %>%
mutate(time=seq_along(x)) %>%
slice(1:4000)
df_1 %>%
select(time, dist, angle) %>%
pivot_longer(-time) %>%
ggplot(aes(x=time, y=value)) +
geom_line() +
facet_wrap(~name, scales="free")

Plot raw data
g <- df_1 %>%
select(time, dist, angle) %>%
rename(Distance=dist, Angle=angle) %>%
pivot_longer(-time) %>%
slice(1:1000) %>%
ggplot(aes(x=time, y=value)) +
geom_line() +
facet_wrap(~name, scales="free") +
xlab("Time") +
ylab("") +
theme(
strip.text=element_text(size=14),
axis.title = element_text(size=14),
legend.title = element_text(size=14),
legend.text = element_text(size=14)
)
g
ggsave("../figures/angle_dist_separate_raw.pdf", g, width = 8, height = 4)

Fit normal model
data_stan <- list(
N=nrow(df_1),
dist=df_1$dist,
angle=df_1$angle,
K=2,
cost=10,
p_anomalous=0.01,
conc_anomalous=100
)
model <- stan_model("hmm_wrapped_cauchy.stan")
DIAGNOSTIC(S) FROM PARSER:
Info:
Left-hand side of sampling statement (~) may contain a non-linear transform of a parameter or local variable.
If it does, you need to include a target += statement with the log absolute determinant of the Jacobian of the transform.
Left-hand-side of sampling statement:
B_step[1] ~ normal(...)
Info:
Left-hand side of sampling statement (~) may contain a non-linear transform of a parameter or local variable.
If it does, you need to include a target += statement with the log absolute determinant of the Jacobian of the transform.
Left-hand-side of sampling statement:
B_step[2] ~ normal(...)
fit <- optimise_repeat(5, model, data_stan)
fit_n <- fit
g <- df_1 %>%
select(time, dist, angle) %>%
rename(Distance=dist, Angle=angle) %>%
mutate(state=as.factor(fit$par$state)) %>%
pivot_longer(-c(time, state)) %>%
ggplot(aes(x=value, fill=state)) +
stat_density(position="identity", alpha=0.5) +
scale_fill_brewer("State", palette = "Dark2") +
facet_wrap(~name, scales="free") +
xlab("") +
ylab("Density") +
theme(
strip.text=element_text(size=14),
axis.title = element_text(size=14),
legend.title = element_text(size=14),
legend.text = element_text(size=14)
)
g
ggsave("../figures/angle_dist_separate_standard.pdf", g, width = 8, height = 4)

Using rubbish collector
data_stan <- list(
N=nrow(df_1),
dist=df_1$dist,
angle=df_1$angle,
K=2,
cost=0.5,
p_anomalous=0.1,
conc_anomalous=10
)
model <- stan_model("hmm_bernoulli_wrapped_cauchy.stan")
DIAGNOSTIC(S) FROM PARSER:
Info:
Left-hand side of sampling statement (~) may contain a non-linear transform of a parameter or local variable.
If it does, you need to include a target += statement with the log absolute determinant of the Jacobian of the transform.
Left-hand-side of sampling statement:
B_step[1] ~ normal(...)
Info:
Left-hand side of sampling statement (~) may contain a non-linear transform of a parameter or local variable.
If it does, you need to include a target += statement with the log absolute determinant of the Jacobian of the transform.
Left-hand-side of sampling statement:
B_step[2] ~ normal(...)
fit <- optimise_repeat(2, model, data_stan)
fit_r <- fit
g <- df_1 %>%
select(time, dist, angle) %>%
rename(Distance=dist, Angle=angle) %>%
mutate(state=as.factor(fit$par$state)) %>%
pivot_longer(-c(time, state)) %>%
ggplot(aes(x=value, fill=state)) +
stat_density(position="identity", alpha=0.5) +
scale_fill_brewer("State", palette = "Dark2") +
facet_wrap(~name, scales="free") +
xlab("") +
ylab("Density") +
theme(
strip.text=element_text(size=14),
axis.title = element_text(size=14),
legend.title = element_text(size=14),
legend.text = element_text(size=14)
)
g
ggsave("../figures/angle_dist_separate_rubbish.pdf", g, width = 8, height = 4)

g <- df_1 %>%
select(time, dist, angle) %>%
mutate(state=as.factor(fit$par$state)) %>%
ggplot(aes(x=dist, y=angle)) +
geom_point(aes(colour=state), size=1) +
scale_colour_brewer("State", palette = "Dark2") +
xlab("Distance") +
ylab("Angle") +
facet_wrap(~state) +
theme(
strip.text=element_text(size=14),
axis.title = element_text(size=14),
legend.title = element_text(size=14),
legend.text = element_text(size=14)
)
g
ggsave("../figures/angle_dist_2d_rubbish.pdf", g, width = 8, height = 4)

df_1 %>%
select(time, dist, angle) %>%
mutate(state=as.factor(fit$par$state)) %>%
ggplot(aes(x=dist, y=angle)) +
geom_density_2d(aes(color=state))

Get higher correlations between states and covariates when using
cleaned states
tibble(rubbish=a[, 1], original=b[, 1]) %>%
mutate(variable=names(a[, 1])) %>%
dplyr::filter(variable != "state") %>%
pivot_longer(-variable) %>%
ggplot(aes(x=variable, y=value, fill=name)) +
geom_col(position=position_dodge2()) +
scale_fill_brewer("Model", palette="Dark2") +
ylab("Correlation") +
xlab("Covariate")
Warning in gzfile(file, "wb") :
cannot open compressed file '/Users/bcl206/.local/share/rstudio/notebooks/45A48B1F-s_hmm_bernoulli/1/D938F7198062BBDC/c101yb2kwh7r2_t/70df558a59de45338ca21c805896c00e.snapshot', probable reason 'No such file or directory'
Error in gzfile(file, "wb") : cannot open the connection
Error in dev.off() :
QuartzBitmap_Output - unable to open file '/Users/bcl206/.local/share/rstudio/notebooks/45A48B1F-s_hmm_bernoulli/1/D938F7198062BBDC/c101yb2kwh7r2_t/_rs_chunk_plot_001.png'
Is this generally true?
g1
$g1
$g2
$g3
$df
NA



g2
$g1
$g2
$g3
$df
NA



g3
$g1
$g2
$g3
Warning: Removed 6 rows containing missing values (`geom_col()`).
$df
NA



g4
$g1
$g2
$g3
$df
NA



g5
$g1
$g2
$g3
$df
NA



---
title: "HMM Bernoulli"
output: html_notebook
---

```{r}
source("helper.R")
```

Generate data with imperfections
```{r}
mus <- c(1, 8)
sigmas <- c(1, 1)
dfs <- c(1.2, 1.2)
K <- 2
m_transition_probs <- matrix(c(0.9, 0.1, 0.1, 0.9), ncol = 2)

n_steps <- 1000
df <- simulate_hmm(n_steps, 1, m_transition_probs, 0.2, mus, sigmas, dfs)

g <- df %>% 
  ggplot(aes(x=time, y=obs)) +
  geom_line() +
  geom_point(aes(colour=state)) +
  xlab("Time") +
  ylab(latex2exp::TeX("$\\Y_t")) +
  scale_color_brewer("State", palette = "Dark2")
g

ggsave("../figures/simulated_corrupt.pdf", g, width = 6,
       height = 3)
```

Fit normal HMM with 2 states
```{r}
model <- stan_model("hmm.stan")

data_stan <- list(
  N=nrow(df),
  dist=df$obs,
  K=2,
  cost=10,
  p_anomalous=0.01,
  conc_anomalous=100
)

fit <- optimise_repeat(5, model, data_stan)

# some issue with algo for t=1
df$state_est <- fit$par$state
df$state_est[1] <- 1

g <- df %>%
  mutate(state_est=as.factor(state_est)) %>% 
  ggplot(aes(x=time, y=obs)) +
  geom_line() +
  geom_point(aes(colour=state_est)) +
  xlab("Time") +
  ylab(latex2exp::TeX("$\\Y_t")) +
  scale_color_brewer("State", palette = "Dark2")

g
ggsave("../figures/simulated_corrupt_normal_state.pdf", g, width = 6,
       height = 3)
```

Try normal model with 3 states
```{r}
model <- stan_model("hmm.stan")

data_stan <- list(
  N=nrow(df),
  dist=df$obs,
  K=3,
  cost=10,
  p_anomalous=0.01,
  conc_anomalous=100
)

fit <- optimise_repeat(5, model, data_stan)

df$state_est <- fit$par$state
df$state_est[1] <- 1

g <- df %>%
  mutate(state_est=as.factor(state_est)) %>% 
  ggplot(aes(x=time, y=obs)) +
  geom_line() +
  geom_point(aes(colour=state_est)) +
  xlab("Time") +
  ylab(latex2exp::TeX("$\\Y_t")) +
  scale_color_brewer("State", palette = "Dark2")

g

ggsave("../figures/simulated_corrupt_normal_state_3.pdf", g, width = 6,
       height = 3)
```


Fit a model with rubbish collection state.
```{r}
data_stan <- list(
  N=nrow(df),
  dist=df$obs,
  K=2,
  cost=10,
  p_anomalous=0.01,
  conc_anomalous=100
)
model <- stan_model("hmm_bernoulli.stan")
fit <- optimise_repeat(5, model, data_stan)

df$state_est <- fit$par$state
df$state_est[1] <- 1

g <- df %>%
  mutate(state_est=as.factor(state_est)) %>% 
  ggplot(aes(x=time, y=obs)) +
  geom_line() +
  geom_point(aes(colour=state_est)) +
  xlab("Time") +
  ylab(latex2exp::TeX("$\\Y_t")) +
  scale_color_brewer("State", palette = "Dark2")

g

ggsave("../figures/simulated_corrupt_rubbish.pdf", g, width = 6,
       height = 3)
```

```{r}
fit$par$phi
fit$par$mu
fit$par$sigma
```


# 2D HMM
```{r}
states <- simulate_hmm_states(n_steps, 1, m_transition_probs)
library(mvtnorm)
rmvrnorm2D <- function(n, mux, muy, sigmax, sigmay, rho){
  return(rmvnorm(n, c(mux, muy),
                 matrix(c(sigmax^2, sigmax * sigmay * rho,
                          sigmax * sigmay * rho, sigmay^2),
                        ncol = 2)))
}
rmvt2D <- function(n, mux, muy, sigmax, sigmay, rho, df){
  return(rmvt(n,
              matrix(c(sigmax^2, sigmax * sigmay * rho,
                       sigmax * sigmay * rho, sigmay^2),
                     ncol = 2),
              df,
              c(mux, muy)))
}

x <- matrix(nrow = length(states), ncol=2)
p1_corruption <- 0.05
p2_corruption <- 0.1
for(i in seq_along(states)) {
  u <- runif(1)
  if(states[i] == 1) {
    if(u < p1_corruption) { 
      x[i, ] <- rmvrnorm2D(1, -5, -5, 2, 2, 0)
    } else {
      x[i, ] <- rmvrnorm2D(1, 0, 0, 1, 1, 0)
    }
  } else {
    if(u < p2_corruption) { 
      x[i, ] <- rmvrnorm2D(1, 4, 4, 1, 1, 0.8)
    } else {
      x[i, ] <- rmvrnorm2D(1, 8, 8, 1, 1, 0)
    }
  }
}

x <- x %>% 
  as.data.frame()

g <- x %>% 
  mutate(state=states) %>% 
  ggplot(aes(x=V1, y=V2)) +
  geom_point(aes(colour=as.factor(state))) +
  xlab(latex2exp::TeX("$\\Y_{1t}")) +
  ylab(latex2exp::TeX("$\\Y_{2t}")) +
  scale_color_brewer("State", palette = "Dark2")
g

ggsave("../figures/simulated_2d_corrupt.pdf", g, width = 6,
       height = 3)
```

Fit 2D robust HMM
```{r}
data_stan <- list(
  N=nrow(df),
  dist=x %>% as.matrix(),
  K=2,
  cost=7,
  p_anomalous=0.1,
  conc_anomalous=100
)
model <- stan_model("hmm_bernoulli_2d.stan")
fit <- optimise_repeat(10, model, data_stan)
fit_r <- fit

g <- x %>% 
  mutate(state=fit_r$par$state) %>% 
  ggplot(aes(x=V1, y=V2)) +
  geom_point(aes(colour=as.factor(state))) +
  xlab(latex2exp::TeX("$\\Y_{1t}")) +
  ylab(latex2exp::TeX("$\\Y_{2t}")) +
  scale_color_brewer("State", palette = "Dark2")
g

ggsave("../figures/simulated_2d_corrupt_rubbish.pdf", g, width = 6,
       height = 3)
```

```{r}
fit$par$mu
fit$par$sigma
```

Try standard HMM with 2 states
```{r}
data_stan <- list(
  N=nrow(df),
  dist=x %>% as.matrix(),
  K=2,
  cost=7,
  p_anomalous=0.1,
  conc_anomalous=100
)

model <- stan_model("hmm_2d.stan")
fit <- optimise_repeat(10, model, data_stan)
fit_2 <- fit

# manual state cleaning
state_1 <- fit_2$par$state
state_c <- if_else(state_1==1, 2, 1)

g <- x %>% 
  mutate(state=state_c) %>% 
  ggplot(aes(x=V1, y=V2)) +
  geom_point(aes(colour=as.factor(state_c))) +
  xlab(latex2exp::TeX("$\\Y_{1t}")) +
  ylab(latex2exp::TeX("$\\Y_{2t}")) +
  scale_color_brewer("State", palette = "Dark2")
g

ggsave("../figures/simulated_2d_corrupt_normal_2.pdf", g, width = 6,
       height = 3)
```

```{r}
fit$par$mu
fit$par$sigma
```


Try standard HMM with 3 states
```{r}
data_stan <- list(
  N=nrow(df),
  dist=x %>% as.matrix(),
  K=3,
  cost=7,
  p_anomalous=0.1,
  conc_anomalous=100
)

model <- stan_model("hmm_2d.stan")
fit <- optimise_repeat(10, model, data_stan)
fit_3 <- fit

# manual state cleaning
state_1 <- fit_3$par$state
state_c <- if_else(state_1==1, if_else(state_1), 1)

g <- x %>% 
  mutate(state=state_c) %>% 
  ggplot(aes(x=V1, y=V2)) +
  geom_point(aes(colour=as.factor(state_1))) +
  xlab(latex2exp::TeX("$\\Y_{1t}")) +
  ylab(latex2exp::TeX("$\\Y_{2t}")) +
  scale_color_brewer("State", palette = "Dark2")
g

ggsave("../figures/simulated_2d_corrupt_normal_3.pdf", g, width = 6,
       height = 3)
```

```{r}
fit$par$mu
fit$par$sigma
```

Plot state cleanliness
```{r}
mu_r <- fit_r$par$mu
mu_r <- mu_r[c(2, 1), ]
mu_2 <- fit_2$par$mu
mu_2 <- mu_2[c(2, 1), ]
mu_3 <- fit_3$par$mu[c(1, 3), ]
mu_true <- matrix(c(0, 0, 8, 8), ncol=2, byrow = TRUE)

tmp <- tribble(
  ~y1, ~y2, ~state, ~label,
  mu_true[1, 1], mu_true[1, 2], 1, "truth",
  mu_true[2, 1], mu_true[2, 2], 2, "truth",
  mu_r[1, 1], mu_r[1, 2], 1, "rubbish",
  mu_r[2, 1], mu_r[2, 2], 2, "rubbish",
  mu_2[1, 1], mu_2[1, 2], 1, "2-state normal",
  mu_2[2, 1], mu_2[2, 2], 2, "2-state normal",
  mu_3[1, 1], mu_3[1, 2], 1, "3-state normal",
  mu_3[2, 1], mu_3[2, 2], 2, "3-state normal"
) %>% 
  mutate(state=as.factor(state))

g <- tmp %>% 
  ggplot(aes(x=y1, y=y2)) +
  geom_point(data=tmp %>% filter(label=="truth"), colour="red") +
  geom_point(aes(shape=label), size=2) +
  xlab(latex2exp::TeX("$\\Y_{1t}")) +
  ylab(latex2exp::TeX("$\\Y_{2t}")) +
  scale_color_brewer("State", palette = "Dark2") +
  facet_wrap(~state, scales="free")
g

ggsave("../figures/state_cleanliness_mean.pdf", g)
```
Output covariances for visualisation in Mathematica
```{r}
sigma_r <- fit_r$par$sigma
sigma_2 <- fit_2$par$sigma
sigma_3 <- fit_3$par$sigma
```


# Apply rubbish collection to real data
```{r}
df <- read.csv("../data/ElephantsData_ano.csv",sep=";")
df <- df %>% 
  rename_all(tolower)
```


```{r}
df_1 <- df %>% 
  filter(id == 10) %>% 
  mutate(dist = dist /max(dist)) %>% 
  mutate(time=seq_along(x)) %>% 
  slice(1:4000)

df_1 %>% 
  select(time, dist, angle) %>% 
  pivot_longer(-time) %>% 
  ggplot(aes(x=time, y=value)) +
  geom_line() +
  facet_wrap(~name, scales="free")
```

Plot raw data
```{r}
g <- df_1 %>% 
  select(time, dist, angle) %>% 
  rename(Distance=dist, Angle=angle) %>% 
  pivot_longer(-time) %>% 
  slice(1:1000) %>% 
  ggplot(aes(x=time, y=value)) +
  geom_line() +
  facet_wrap(~name, scales="free") +
  xlab("Time") +
  ylab("") +
  theme(
    strip.text=element_text(size=14),
    axis.title = element_text(size=14),
    legend.title = element_text(size=14),
    legend.text = element_text(size=14)
  )
g
ggsave("../figures/angle_dist_separate_raw.pdf", g, width = 8, height = 4)
```


Fit normal model
```{r}
data_stan <- list(
  N=nrow(df_1),
  dist=df_1$dist,
  angle=df_1$angle,
  K=2,
  cost=10,
  p_anomalous=0.01,
  conc_anomalous=100
)
model <- stan_model("hmm_wrapped_cauchy.stan")
fit <- optimise_repeat(5, model, data_stan)
fit_n <- fit

df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  pivot_longer(-c(time, state)) %>% 
  ggplot(aes(x=time, y=value, colour=state)) +
  geom_line(aes(group=1)) +
  facet_wrap(~name, scales="free")
```

```{r}
g <- df_1 %>% 
  select(time, dist, angle) %>% 
  rename(Distance=dist, Angle=angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  pivot_longer(-c(time, state)) %>% 
  ggplot(aes(x=value, fill=state)) +
  stat_density(position="identity", alpha=0.5) +
  scale_fill_brewer("State", palette = "Dark2") +
  facet_wrap(~name, scales="free") +
  xlab("") +
  ylab("Density") +
  theme(
    strip.text=element_text(size=14),
    axis.title = element_text(size=14),
    legend.title = element_text(size=14),
    legend.text = element_text(size=14)
  )
g
ggsave("../figures/angle_dist_separate_standard.pdf", g, width = 8, height = 4)
```

Using rubbish collector
```{r}
data_stan <- list(
  N=nrow(df_1),
  dist=df_1$dist,
  angle=df_1$angle,
  K=2,
  cost=0.5,
  p_anomalous=0.1,
  conc_anomalous=10
)
model <- stan_model("hmm_bernoulli_wrapped_cauchy.stan")
fit <- optimise_repeat(2, model, data_stan)
fit_r <- fit

df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  pivot_longer(-c(time, state)) %>% 
  ggplot(aes(x=time, y=value, colour=state)) +
  geom_line(aes(group=1)) +
  scale_color_brewer("State", palette = "Dark2") +
  facet_wrap(~name, scales="free")
```


```{r}
g <- df_1 %>% 
  select(time, dist, angle) %>% 
  rename(Distance=dist, Angle=angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  pivot_longer(-c(time, state)) %>% 
  ggplot(aes(x=value, fill=state)) +
  stat_density(position="identity", alpha=0.5) +
  scale_fill_brewer("State", palette = "Dark2") +
  facet_wrap(~name, scales="free") +
  xlab("") +
  ylab("Density") +
  theme(
    strip.text=element_text(size=14),
    axis.title = element_text(size=14),
    legend.title = element_text(size=14),
    legend.text = element_text(size=14)
  )
g
ggsave("../figures/angle_dist_separate_rubbish.pdf", g, width = 8, height = 4)
```


```{r}
g <- df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  ggplot(aes(x=dist, y=angle)) +
  geom_point(aes(colour=state), size=1) +
  scale_colour_brewer("State", palette = "Dark2") +
  xlab("Distance") +
  ylab("Angle") +
  facet_wrap(~state) +
  theme(
    strip.text=element_text(size=14),
    axis.title = element_text(size=14),
    legend.title = element_text(size=14),
    legend.text = element_text(size=14)
  )
g
ggsave("../figures/angle_dist_2d_rubbish.pdf", g, width = 8, height = 4)
```

```{r}
df_1 %>% 
  select(time, dist, angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  ggplot(aes(x=dist, y=angle)) +
  geom_density_2d(aes(color=state))
```

Get higher correlations between states and covariates when using cleaned states
```{r}
df_1a <- df_1 %>% 
  mutate(state=fit_r$par$state) %>% 
  filter(state != 3) %>% 
  select(state, field, river, trees, timeofday, corridor, season)

a <- cor(df_1a)

df_1b <- df_1 %>% 
  mutate(state=fit_n$par$state) %>% 
  filter(state != 3) %>% 
  select(state, field, river, trees, timeofday, corridor, season)

b <- cor(df_1b)

g <- tibble(rubbish=a[, 1], original=b[, 1]) %>% 
  mutate(variable=names(a[, 1])) %>% 
  dplyr::filter(variable != "state") %>% 
  pivot_longer(-variable) %>% 
  ggplot(aes(x=variable, y=value, fill=name)) +
  geom_col(position=position_dodge2()) +
  scale_fill_brewer("Model", palette="Dark2") +
  ylab("Correlation") +
  xlab("Covariate") +
  theme(axis.title=element_text(size=14),
        legend.title = element_text(size=14),
        legend.text=element_text(size=14),
        axis.text=element_text(size=14))

ggsave("../figures/angle_dist_correlations.pdf", g, width = 8, height = 4)
```

Is this generally true?
```{r}
plot_angle_distance <- function(df_1, fit) {
  df_1 %>% 
  select(time, dist, angle) %>% 
  rename(Distance=dist, Angle=angle) %>% 
  mutate(state=as.factor(fit$par$state)) %>% 
  pivot_longer(-c(time, state)) %>% 
  ggplot(aes(x=value, fill=state)) +
  stat_density(position="identity", alpha=0.5) +
  scale_fill_brewer("State", palette = "Dark2") +
  facet_wrap(~name, scales="free") +
  xlab("") +
  ylab("Density") +
  theme(
    strip.text=element_text(size=14),
    axis.title = element_text(size=14),
    legend.title = element_text(size=14),
    legend.text = element_text(size=14)
  )
}

fit_plot_compare_models <- function(df, num_rows, a_id) {

  df_1 <- df %>% 
    filter(id == a_id) %>% 
    mutate(dist = dist /max(dist)) %>% 
    mutate(time=seq_along(x)) %>% 
    slice(1:num_rows)
  
  data_stan <- list(
    N=nrow(df_1),
    dist=df_1$dist,
    angle=df_1$angle,
    K=2,
    cost=10,
    p_anomalous=0.01,
    conc_anomalous=100
  )
  model <- stan_model("hmm_wrapped_cauchy.stan")
  fit <- optimise_repeat(2, model, data_stan)
  fit_n <- fit
  
  data_stan <- list(
    N=nrow(df_1),
    dist=df_1$dist,
    angle=df_1$angle,
    K=2,
    cost=0.5,
    p_anomalous=0.1,
    conc_anomalous=10
  )
  model <- stan_model("hmm_bernoulli_wrapped_cauchy.stan")
  fit <- optimise_repeat(2, model, data_stan)
  fit_r <- fit
  
  g1 <- plot_angle_distance(df_1, fit_n)
  g2 <- plot_angle_distance(df_1, fit_r)
  
  df_1a <- df_1 %>% 
    mutate(state=fit_r$par$state) %>% 
    filter(state != 3) %>% 
    select(state, field, river, trees, timeofday, corridor, season)
  
  a <- cor(df_1a)
  
  df_1b <- df_1 %>% 
    mutate(state=fit_n$par$state) %>% 
    filter(state != 3) %>% 
    select(state, field, river, trees, timeofday, corridor, season)
  
  b <- cor(df_1b)
  
  df_new <- tibble(new=a[, 1], old=b[, 1]) %>% 
    mutate(variable=names(a[, 1]))
  
  g3 <- df_new %>% 
    dplyr::filter(variable != "state") %>% 
    pivot_longer(-variable) %>% 
    ggplot(aes(x=variable, y=value, fill=name)) +
    geom_col(position=position_dodge2())
  
  list(g1=g1, g2=g2, g3=g3, df=df_new)
}

g1 <- fit_plot_compare_models(df, 10000, 1)
g2 <- fit_plot_compare_models(df, 10000, 2)
g3 <- fit_plot_compare_models(df, 10000, 3)
g4 <- fit_plot_compare_models(df, 10000, 4)
g5 <- fit_plot_compare_models(df, 10000, 5)

g1
g2
g3
g4
g5

```

